#################################################################################
# Estimating Dynamic Ideal Points for State Supreme Courts                      #
# Jason Windett, Jeffrey J. Harden, and Matthew E.K. Hall                       #
# Fit Statistics                                                                #
# Last update: 3/2/15                                                           # 
#################################################################################
### Packages and Data ###
library(foreign)
library(logistf)

vvotes <- read.dta("fit-stats.dta")
vvotes <- vvotes[vvotes$state != "", ]

## For 80/20 CV ##
 vvotes <- vvotes[vvotes$test == 1, ]; vvotes <- vvotes[!is.na(vvotes$test) & !is.na(vvotes$idealpointcv), ] 
 vvotes$scaledidealcv <- NA
 
# Create scaled measure with CV training data #
states <- sort(unique(vvotes$state))

 for(i in 1:length(states)){
 d <- vvotes[vvotes$state == paste(states[i]) & !is.na(vvotes$idealpointcv) & !is.na(vvotes$cfscore), ]
 m.scale <- lm(cfscore ~ idealpointcv, data = d)
 vvotes$scaledidealcv[vvotes$state == paste(states[i]) & !is.na(vvotes$idealpointcv) & !is.na(vvotes$cfscore)] <- fitted(m.scale)
 }

### Fit Statistic Computations ###
results <- matrix(NA, nrow = length(states), ncol = 9)
rownames(results) <- states; colnames(results) <- c("DIP CC", "PAJID CC", "CF CC", "DIP CC Win?", "DIP APRE", "PAJID APRE", "CF APRE", "DIP APRE Win?", "N Cases")

## Loop over states ##
# for(j in 1:length(states)){
 for(j in c(1:40, 42:52)){ # Skip SC (use with scaledidealcv)
cases <- unique(vvotes[vvotes$state == states[j], ]$case)

error.dip <- error.pajid <- error.cf <- min.dip <- min.pajid <- min.cf <- co.dip <- co.pajid <- co.cf <- matrix(NA, nrow = length(cases), ncol = 1)

## Loop over cases ##
for(i in 1:length(cases)){
d <- vvotes[vvotes$state == states[j] & vvotes$case == cases[i], ]

# Dynamic ideal point measure #
# m.dip <- logistf(Vote ~ idealpoint, data = d) # All data
# m.dip <- logistf(Vote ~ idealpointcv, data = d) # 80/20 CV
 m.dip <- logistf(Vote ~ scaledidealcv, data = d) # Scaled ideal point with 80/20 CV
 
pred.dip <- ifelse(m.dip$predict > .5, 1, 0)
error.dip[i] <- sum(ifelse(m.dip$y != pred.dip, 1, 0))
min.dip[i] <- length(m.dip$y) - sum(m.dip$y == median(m.dip$y))
co.dip[i] <- (length(m.dip$y) - error.dip[i])/length(m.dip$y)

# PAJID
m.pajid <- try(logistf(Vote ~ pajid, data = d))

pred.pajid <- ifelse(m.pajid$predict > .5, 1, 0)
error.pajid[i] <- sum(ifelse(m.pajid$y != pred.pajid, 1, 0))
min.pajid[i] <- length(m.pajid$y) - sum(m.pajid$y == median(m.pajid$y))
co.pajid[i] <- (length(m.pajid$y) - error.pajid[i])/length(m.pajid$y)

# CFscores
if(j != 41){

if(sum(is.na(d$cfscore)) < .75*nrow(d)){
m.cf <- logistf(Vote ~ cfscore, data = d)

pred.cf <- ifelse(m.cf$predict > .5, 1, 0)
error.cf[i] <- sum(ifelse(m.cf$y != pred.cf, 1, 0))
min.cf[i] <- length(m.cf$y) - sum(m.cf$y == median(m.cf$y))
co.cf[i] <- (length(m.cf$y) - error.cf[i])/length(m.cf$y)
} else print("Not enough CFscores for this case")

co.dip[i] <- ifelse(is.na(co.cf[i]) == TRUE, NA, co.dip[i])
co.pajid[i] <- ifelse(is.na(co.cf[i]) == TRUE, NA, co.pajid[i])
min.dip[i] <- ifelse(is.na(min.cf[i]) == TRUE, NA, min.dip[i])
min.pajid[i] <- ifelse(is.na(min.cf[i]) == TRUE, NA, min.pajid[i])
} else print("Skipping SC for CFscores")

cat("Case", i, "of", length(cases), "\n") 
}

### Combine Results ###
results[j, 1] <- mean(na.omit(co.dip))
results[j, 2] <- mean(na.omit(co.pajid))
results[j, 3] <- mean(na.omit(co.cf))
results[j, 4] <- ifelse(results[j, 1] >= results[j, 2] & results[j, 1] >= results[j, 3], 1, 0) 

results[j, 5] <- sum(na.omit(min.dip - error.dip))/sum(na.omit(min.dip))
results[j, 6] <- sum(na.omit(min.pajid - error.pajid))/sum(na.omit(min.pajid))
results[j, 7] <- sum(na.omit(min.cf - error.cf))/sum(na.omit(min.cf))
results[j, 8] <- ifelse(results[j, 5] >= results[j, 6] & results[j, 5] >= results[j, 7], 1, 0) 

results[j, 9] <- length(cases) - sum(is.na(co.dip))

sink("fit-stats.txt")
print(results)
sink()

cat("Just completed", states[j], "\n")
}

### Save Data ###
# save.image("fit-stats-insample.RData")
# save.image("fit-stats-idealpointcv.RData")
 save.image("fit-stats-scaledidealcv.RData")